{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "6fb7858c-8ea7-4dea-95ea-f5d7d5210b9a",
   "metadata": {},
   "source": [
    "The following is **Meeting minutes Generator** by using **QWEN2** and **Openai Opensource model whisper for transcription**, check the following colab link to see the outputs\n",
    "\n",
    "https://colab.research.google.com/drive/1_pqFmQXjOYG9Se4Zov4blIGeoYX6ViTJ?usp=sharing\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2103adb0-51f3-4240-bc5d-e27b6103cd8a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "47dba08d-5829-417c-9c6c-bdb35ca846a6",
   "metadata": {},
   "outputs": [],
   "source": [
    "AUDIO_MODEL = \"openai/whisper-medium\"\n",
    "speech_model = AutoModelForSpeechSeq2Seq.from_pretrained(AUDIO_MODEL, torch_dtype=torch.float16, low_cpu_mem_usage=True, use_safetensors=True)\n",
    "speech_model.to('cuda')\n",
    "processor = AutoProcessor.from_pretrained(AUDIO_MODEL)\n",
    "\n",
    "pipe = pipeline(\n",
    "    \"automatic-speech-recognition\",\n",
    "    model=speech_model,\n",
    "    tokenizer=processor.tokenizer,\n",
    "    feature_extractor=processor.feature_extractor,\n",
    "    torch_dtype=torch.float16,\n",
    "    device='cuda',\n",
    "    return_timestamps=True #important if audio is more than 30sec\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c35d6c76-01a9-495f-ad4e-84c98e320750",
   "metadata": {},
   "outputs": [],
   "source": [
    "result = pipe(\"your-audio.mp3\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8fba2d46-b806-4bb3-b02d-e628343db986",
   "metadata": {},
   "outputs": [],
   "source": [
    "transcription = result[\"text\"]\n",
    "print(transcription)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1778c4db-d003-4fb9-a0d0-6cfa71e6208d",
   "metadata": {},
   "source": [
    "## MODEL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9eb579a7-b5de-4537-8ad9-e3117b24c2ff",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4c632023-9b37-4c0d-b43a-190aacbbd80d",
   "metadata": {},
   "outputs": [],
   "source": [
    "QWEN2 = \"Qwen/Qwen2-7B-Instruct\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "175814b9-81b2-4f75-bf40-9ef7cac492cd",
   "metadata": {},
   "outputs": [],
   "source": [
    "quant_config = BitsAndBytesConfig(\n",
    "    load_in_4bit=True,\n",
    "    bnb_4bit_use_double_quant=True,\n",
    "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
    "    bnb_4bit_quant_type=\"nf4\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8aaa160e-7c2b-4080-b24a-995df4469edd",
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained(QWEN2)\n",
    "#tokenizer.pad_token = tokenizer.oes_token\n",
    "inputs = tokenizer.apply_chat_template(messages, return_tensors=\"pt\", add_generation_ptrompt=True).to(\"cuda\")\n",
    "streamer = TextStreamer(tokenizer)\n",
    "model = AutoModelForCausalLM.from_pretrained(QWEN2 , device_map=\"auto\", quantization_config=quant_config)\n",
    "outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "517443aa-d230-4248-88aa-b06efd8ee3cd",
   "metadata": {},
   "outputs": [],
   "source": [
    "response = tokenizer.decode(outputs[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "47562f76-fd35-4eb0-a399-8e8f1fa054c3",
   "metadata": {},
   "source": [
    "## **For Markdown display**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1f77fea1-0920-46e5-9230-d0e8b9f69353",
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import Markdown, display, update_display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "35ac81e2-f960-4705-aaca-2385d8aa12d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "display(Markdown(response))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
